Introduction

Hi team! This is my first true stab at the data. I addressed 4 different hypotheses, each with their own sections. The first hypothesis do not seem to have any ‘positive’ outcome however the last three show some promise. With full intention to bias you, the underdog hypothesis (number 3) is my favorite.

I will step through each of the hypotheses in order, with their corresponding R-code and outputs.

At the end, I have a quick mention to the Inter-Rater analysis.

Here is also a link to the git-hub repository containing my code and the raw data spreadsheets if you would like to take a look. I may have to give you permission to access them, apologies for the extra step: [Click here for Git-hub code link] (https://github.com/paigeliebel/Soccer_Touch.git)

For readability, the code required for cleaning, merging, and fixing typos in the data frames was hidden from this report. You can see the code in the github.

Hypothesis 1: Touch as a Metric for End of Season Success

Core Hypothesis: Pro-social touches between teammates may serve as an indicator for overall team cohesion. Therefore, we propose that teams with a greater frequency of these interactions will secure higher positions in the season’s final standings.

Sub-Hypothesis 1: Teams that show less variability across matches throughout a season with regards to their touch will secure higher positions in the season’s final standings.

For the Sub-Hypothesis the touches were scaled for each team based on distance from mean (MAD-based z-score). Some teams are ‘touchier’ than others.

Definitions:

Pro-social touches are defined as all haptic rituals recorded excluding: Tactical Adjustments, Collisions, and Negative Touch

Situations excluded from analysis for this hypothesis: Goals For/Against, Substitutions

library(tidyverse)
library(data.table)
library(broom)
library(janitor)
library(readxl)
library(rmarkdown)
library(readr)
library (dplyr)

Exclude_Touch <- c("TA", "CO", "NEG")
Exclude_Situation <- c("GF", "GA", "SUB") 
#Exclude_Visibility <- c("P")

#Creates data set for core hypothesis analysis
Touches_CoreHyp <- Touches_final %>%
  filter(!(HapticRitual %in% Exclude_Touch)) %>%
  filter(!(Situation %in% Exclude_Situation)) #%>% 
  #filter(!(Visibility %in% Exclude_Visibility))

#Count of frequency of touches per team
Touches_by_team <- Touches_CoreHyp %>%
  mutate(Team = str_trim(as.character(Team))) %>%
  count(Team, name = "TotalTouches")

# Make sure TeamID is padded to match
FinalStandings <- FinalStandings %>%
  mutate(TeamID = str_pad(as.character(TeamID), width = 2, pad = "0"))

#Join touch counts with final standings
Team_Touches_Standings <- FinalStandings %>%
  left_join(Touches_by_team, by = c("TeamID" = "Team")) %>%
  filter(!is.na(TotalTouches))

#Plot with regression line : final rankings to frequency of touch
TouchFreq_vs_FinalStandings <- ggplot(Team_Touches_Standings, aes(x = Rank, y = TotalTouches)) +
  geom_point(size = 3) +
  geom_smooth(method = "lm", se = FALSE, color = "blue", linewidth = 1) +
  scale_x_reverse() +
  labs(
    title = "Final Rank vs Overall Touch Frequency",
    x = "Final Season Rank",
    y = "Total Touches (Filtered)"
  ) +
  theme_minimal()

TouchFreq_vs_FinalStandings_Stats <- cor_result <- cor.test(Team_Touches_Standings$TotalTouches, Team_Touches_Standings$Rank)

Results for Hypothesis 1:

## 
##  Pearson's product-moment correlation
## 
## data:  Team_Touches_Standings$TotalTouches and Team_Touches_Standings$Rank
## t = -0.072115, df = 12, p-value = 0.9437
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.5453701  0.5154585
## sample estimates:
##         cor 
## -0.02081319

Sub-Hypothesis 1

########## Within-Team Variability in Touch Frequency ########## 

# Looks at the variability a team has across matches throughout the season

# Count touches per team per game from CoreHyp data frame
Touches_per_game <- Touches_CoreHyp %>%
  group_by(Team, SeasonMatchNumber) %>%
  summarise(TouchCount = n(), .groups = "drop")

# Computing Within-Team Variability
Team_touch_variability <- Touches_per_game %>%
  group_by(Team) %>%
  summarise(
    MeanTouches = mean(TouchCount),
    SDTouches = sd(TouchCount),
    MinTouches = min(TouchCount),
    MaxTouches = max(TouchCount),
    NumGames = n(),
    .groups = "drop"
  )

# Join season rank to each team for ordering in the plot
Touches_per_game_ranked <- Touches_per_game %>%
  mutate(Team = str_pad(as.character(Team), width = 2, pad = "0")) %>%
  left_join(FinalStandings %>% select(TeamID, Rank), by = c("Team" = "TeamID")) %>%
  filter(!is.na(Rank))  # make sure we only include ranked teams

# Visualize 
TouchesPerGame_vs_rank <- ggplot(Touches_per_game_ranked, aes(x = Rank, y = TouchCount, group = Rank)) +
  geom_boxplot(fill = "lightblue", color = "black") +
  scale_x_reverse(breaks = 1:14) +  # clean 1–14 axis
  labs(
    title = "Variation of Within-Team Touch Frequency per Game",
    x = "Team (Ordered by Final Rank)",
    y = "Touches per Game"
  ) +
  theme_minimal()

# Scale touch based on distance from mean (MAD-based z-score)
# How extreme a touch count is compared to team's norm
Touches_scaled <- Touches_per_game %>%
  group_by(Team) %>%
  mutate(
    MedianTouch = median(TouchCount),
    MAD = mad(TouchCount),  # median absolute deviation
    ScaledTouch = (TouchCount - MedianTouch) / MAD
  ) %>%
  ungroup()

# Join Touches_scaled with ranks
Touches_scaled_ranked <- Touches_scaled %>%
  mutate(Team = str_pad(as.character(Team), width = 2, pad = "0")) %>%
  left_join(FinalStandings %>% select(TeamID, Rank), by = c("Team" = "TeamID")) %>%
  filter(!is.na(Rank))

# Plot of MAD
MAD_TouchesPerGame_vs_rank <- ggplot(Touches_scaled_ranked, aes(x = Rank, y = ScaledTouch, group = Rank)) +
  geom_boxplot(fill = "lightblue", color = "black") +
  geom_hline(yintercept = c(-2, 2), linetype = "dashed", color = "red") +
  scale_x_reverse(breaks = 1:14) +  # clean 1–14 axis
  labs(
    title = "Scaled Touch Deviation from Team Median",
    subtitle = "Boxplot of (TouchCount - Median) / MAD per Team",
    x = "Team (Ordered by Final Rank)",
    y = "Scaled Touch Value (MAD Units)"
  ) +
  theme_minimal()

########## Within-Team Variability in Touch Frequency vs Ranking ########## 

# Join variability data to final standings
Variability_vs_Rank <- Team_touch_variability %>%
  mutate(Team = str_pad(as.character(Team), width = 2, pad = "0")) %>%
  left_join(FinalStandings %>% select(TeamID, Rank), by = c("Team" = "TeamID")) %>%
  filter(!is.na(Rank))

# Plot SDTouches vs Rank
Within_Variability_vs_Rank <- ggplot(Variability_vs_Rank, aes(x = Rank, y = SDTouches)) +
  geom_point(size = 3) +
  geom_smooth(method = "lm", se = FALSE, color = "blue", linewidth = 1) +
  scale_x_reverse(breaks = 1:14) +
  labs(
    title = "Team Variability in Touch vs Final Rank",
    x = "Final Season Rank",
    y = "Touch Frequency Variability (SD)"
  ) +
  theme_minimal()

Within_Variability_vs_Rank_Stats <- cor.test(Variability_vs_Rank$SDTouches, Variability_vs_Rank$Rank)

Results for Sub-Hypothesis 1

Hypothesis 2: Inter-Match Variability

The frequency of pro-social touch across teams may vary due to confounding variables such as team culture or tactics. Therefore, instead of analyzing team to team variations, we suggest that the inter-match variability of pro-social touches within an individual team could be used as an indicator for individual match outcomes. We propose that teams with more pro-social touches within a match than their respective season average are more likely to have a higher goal differential.

Note: Whereas hypothesis 1 looked at outcomes for the whole season, hypothesis 2 is looking at a match level outcome.

Additionally, I ran a Wilcoxon test in fear that the results were perhaps not nicely distributed.

# Inter-Match Variability Hypothesis
# For more information on these data frames please look at the README.md file

library(tidyverse)
library(data.table)
library(broom)
library(janitor)
library(readxl)
library(rmarkdown)
library(readr)
library (dplyr)

#Ensure to use the correct dfs. Touches_final and Matches_final are correct. They only include assigned rater data, no repeat matches

#Check to make sure data frames are loaded:
if (!exists("Touches_final") | !exists("Matches_final") | !exists("FinalStandings") | !exists("Touches_CoreHyp")) {
  stop("Touches_final, Matches_final, Touhe_CoreHyp or FinalStandings not loaded. Check Data_Management.R and Core_Hypothesis.R.")
}

############## Inter-Match Variability Hypothesis ############## 

# Basically asking: "When a team is more (or less) touchy than usual, do they score more or less goals than their opponent?"
# Note that this is still using same CoreHyp touches (therefore only prosocial touches and not including GF, GA, Subs etc)

#Clean Match column data for use
Matches_final_cleaned <- Matches_final %>%
  mutate(
    GoalsFor = as.numeric(str_trim(GoalsFor)),
    GoalsAgainst = as.numeric(str_trim(GoalsAgainst))
  )

#Get TeamID into Matches_final
Matches_finalID <- Matches_final_cleaned %>%
  mutate(
    MatchID = str_pad(MatchID, width = 4, pad = "0"),  # in case it was shortened
    TeamID = str_sub(MatchID, 1, 2),                     # preserve leading zeros
    GoalDiff = GoalsFor - GoalsAgainst
  )

# Prosocial touches per team per match
Touches_per_match <- Touches_CoreHyp %>%
  group_by(Team, SeasonMatchNumber) %>%
  summarise(TouchCount = n(), .groups = "drop")

# Average touch per team over the season
Team_season_avg <- Touches_per_match %>%
  group_by(Team) %>%
  summarise(SeasonAvgTouch = mean(TouchCount), .groups = "drop")

# Merge & calculate scaled deviation from average
Touches_scaled_dev <- Touches_per_match %>%
  left_join(Team_season_avg, by = "Team") %>%
  mutate(ScaledAboveAvg = TouchCount - SeasonAvgTouch) # positive = more touchy than average, neg = less touchy than average

# Joins dataframes (goal differetials to the touches scaled)
Touch_GoalDiff_Analysis <- Touches_scaled_dev %>%
  left_join(
    Matches_finalID %>% select(SeasonMatchNumber, TeamID, GoalDiff),
    by = c("SeasonMatchNumber", "Team" = "TeamID")
  )

# Visualize 
Touch_GoalDiff_Analysis_Graph <- ggplot(Touch_GoalDiff_Analysis, aes(x = ScaledAboveAvg, y = GoalDiff)) +
  geom_point(size = 2, alpha = 0.7) +
  geom_smooth(method = "lm", se = FALSE, color = "blue") +
  labs(
    title = "Touch Count Deviation vs Match Goal Differential",
    x = "Touches Above/Below Team Average",
    y = "Goal Differential",
    caption = "Note: Each dot represents one match outcome for a team, therefore 2 dots for each match"
  ) +
  theme_minimal()

# Pearson
Touch_GoalDiff_Analysis_Stats <- cor.test(Touch_GoalDiff_Analysis$ScaledAboveAvg, Touch_GoalDiff_Analysis$GoalDiff)

############## Wilcoxon: Inter-Match Variability Hypothesis ############## 

# Wilcoxon: Perhaps not nicely distributed: “Do teams tend to have higher goal differentials when they are more touchy than their season average?”

# Create AboveAvgTouch flag
Touch_AboveBelow_Analysis <- Touch_GoalDiff_Analysis %>%
  mutate(AboveAvgTouch = ScaledAboveAvg > 0)

# Summary of goal differentials by touch group
Touch_AboveBelow_Analysis %>%
  group_by(AboveAvgTouch) %>%
  summarise(
    n = n(),
    mean_GD = mean(GoalDiff, na.rm = TRUE),
    median_GD = median(GoalDiff, na.rm = TRUE),
    sd_GD = sd(GoalDiff, na.rm = TRUE)
  )
## # A tibble: 2 × 5
##   AboveAvgTouch     n mean_GD median_GD sd_GD
##   <lgl>         <int>   <dbl>     <dbl> <dbl>
## 1 FALSE           202  -0.444        -1  1.52
## 2 TRUE            162   0.581         1  1.69
# Wilcoxon rank-sum test
Wilcox_Test <- wilcox.test(GoalDiff ~ AboveAvgTouch, data = Touch_AboveBelow_Analysis)

# Visualize Wilcoxon
Wilcox_Test_Graph <- ggplot(Touch_AboveBelow_Analysis, aes(x = AboveAvgTouch, y = GoalDiff)) +
  geom_boxplot(fill = "lightblue") +
  scale_x_discrete(labels = c("FALSE" = "Below Avg Touch", "TRUE" = "Above Avg Touch")) +
  labs(
    title = "Goal Differential by Above/Below Avg Touch",
    x = "Above Team's Avg Touch?",
    y = "Goal Differential"
  ) +
  theme_minimal()

Results for Hypothesis 2:

## 
##  Pearson's product-moment correlation
## 
## data:  Touch_GoalDiff_Analysis$ScaledAboveAvg and Touch_GoalDiff_Analysis$GoalDiff
## t = 5.6914, df = 312, p-value = 2.906e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2028881 0.4036665
## sample estimates:
##      cor 
## 0.306685
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  GoalDiff by AboveAvgTouch
## W = 7714, p-value = 1.949e-08
## alternative hypothesis: true location shift is not equal to 0

Hypothesis 3: Underdog Hypothesis

Teams with greater frequencies of pro-social touch when competing against higher ranked teams will secure a better goal differential than teams with fewer touch instances facing an opponent of the same spread in rankings.

Definition of spread: Spread in rankings is defined as the difference in current standings the teams have between each other. Therefore, the 1st ranked team competing against the 7th ranked team has the same spread as the 7th ranked team playing against the 14th ranked team. This could expose a team’s cohesion by how they react when facing adversity.

Touch is scaled again using MAD.

I used both an ANOVA (which described nothing of interest) and a GAM model (which I love).

Note: First 13 matches of season are excluded. Not all teams play the first weekend of the season. Therefore, the current standings only have a ‘comprehensive picture’ going into the 3rd weekend. In other words, after match 13, at least every team has completed 1 game.

Additionally, for the categorical analysis I arbitrarily determined that a spread between 0-7 was a “mild underdog” or a “mild favorite. A spread greater than 7 was considered”major”. This is simply because there are 14 teams in the league and a spread of 7 would place a team on teh other half of the table.

# Underdog Hypothesis

library(tidyverse)
library(data.table)
library(broom)
library(janitor)
library(readxl)
library(rmarkdown)
library(readr)
library (dplyr)
library(plotly)
library(mgcv)
library(ggplot2)
library(forcats)

#Ensure to use the correct dfs. Touches_final and Matches_final are correct. They only include assigned rater data, no repeat matches

#Check to make sure data frames are loaded:

if (!exists("Touches_final") | !exists("Touches_scaled") | !exists("Matches_finalID") | !exists("FinalStandings") | !exists("Touches_CoreHyp")) {
  stop("Touches_final, Matches_final, Touhe_CoreHyp or FinalStandings not loaded. Check Data_Management.R and Core_Hypothesis.R.")
}

############################ Underdog Hypothesis ############################

# Clean Match column data for use
Matches_final_cleaned_CurrentStandings <- Matches_final %>%
  mutate(
    GoalsFor = as.numeric(str_trim(GoalsFor)),
    GoalsAgainst = as.numeric(str_trim(GoalsAgainst)),
    CurrentStanding = as.numeric(str_trim(CurrentStanding)),
    MatchID = str_pad(MatchID, width = 4, pad = "0"),  # ensure 4-digit MatchID
    TeamID = str_sub(MatchID, 1, 2),                   # extract TeamID from MatchID
    SeasonMatchNumber = as.numeric(SeasonMatchNumber)  # ensure it can be compared numerically
  ) %>%
  filter(SeasonMatchNumber > 13)  # exclude first 13 matches

# Get the spread into the info for each team
Matches_final_Spread <- Matches_final_cleaned_CurrentStandings %>%
  rename_with(~ paste0(.x, "_self")) %>% #renames every column so that you know which row refers to the self team of analysis
  inner_join(
    Matches_final_cleaned_CurrentStandings,
    by = c("SeasonMatchNumber_self" = "SeasonMatchNumber") # joins data frame to itself, matching each game via seasonmatchnumber (_self is of interst) (wihtout is opponent)
  ) %>%
  filter(TeamID_self != TeamID) %>%  # Make sure we’re not joining a row to itself
  mutate(
    GoalDiff = GoalsFor_self - GoalsAgainst_self,
    Spread = CurrentStanding - CurrentStanding_self  # positive = better ranked than opponent, negative = underdog
  ) %>%
  select(
    SeasonMatchNumber = SeasonMatchNumber_self,
    MatchID = MatchID_self,
    TeamID = TeamID_self,
    GoalDiff,
    Spread,
    CurrentStanding = CurrentStanding_self,
    OpponentTeamID = TeamID,
    OpponentStanding = CurrentStanding
  )

#Correcting data types
Touches_scaled_numeric <- Touches_scaled %>%
  mutate(
    SeasonMatchNumber = as.numeric(SeasonMatchNumber),
    Team = as.character(Team)  # just to ensure consistency
  )

# Data frame creation for Underdog analysis
Underdog_Analysis <- Matches_final_Spread %>%
  left_join(
    Touches_scaled_numeric,
    by = c("SeasonMatchNumber", "TeamID" = "Team")
  )

############################ Observed Data Table Summary | Underdog Hypothesis ############################

spread_cutoff <- 7 #arbitrary spread number: I like 7 because it separates the table in half (1st rank team playing against bottom half of table)

# Categorize real match data into underdog/favored + touch level
Underdog_Observed_Summary <- Underdog_Analysis %>%
  filter(!is.na(GoalDiff) & !is.na(ScaledTouch) & !is.na(Spread)) %>%  # ensure clean data
  mutate(
    SpreadGroup = case_when(
      Spread <= -spread_cutoff ~ "Major Underdog",
      Spread > -spread_cutoff & Spread < 0 ~ "Mild Underdog",
      Spread == 0 ~ "Even",
      Spread > 0 & Spread < spread_cutoff ~ "Mild Favorite",
      Spread >= spread_cutoff ~ "Major Favorite"
    ),
    TouchGroup = case_when(
      ScaledTouch >= 1 ~ "High Touch",
      ScaledTouch <= -1 ~ "Low Touch",
      TRUE ~ "Average Touch"
    )
  ) %>%
  group_by(SpreadGroup, TouchGroup) %>%
  summarise(
    MeanObservedGD = mean(GoalDiff, na.rm = TRUE),
    n = n(),
    .groups = "drop"
  ) %>%
  arrange(SpreadGroup, TouchGroup)

#Bar chart for this data: I think easier to understand than the cool looking 3D chart generated below.
# Set factor levels for order
spread_levels <- c("Major Underdog", "Mild Underdog", "Even", "Mild Favorite", "Major Favorite")
touch_levels <- c("Low Touch", "Average Touch", "High Touch")

# Make sure SpreadGroup and TouchGroup are ordered
Underdog_Observed_Summary <- Underdog_Observed_Summary %>%
  mutate(
    SpreadGroup = factor(SpreadGroup, levels = spread_levels),
    TouchGroup = factor(TouchGroup, levels = touch_levels)
  )

# Plot observed data
Barchart_Categorical_Data <- ggplot(Underdog_Observed_Summary, aes(x = SpreadGroup, y = MeanObservedGD, fill = TouchGroup)) +
  geom_col(position = position_dodge(width = 0.8), width = 0.7) +
  geom_text(
    aes(label = paste0("n=", n)),
    position = position_dodge(width = 0.8),
    vjust = ifelse(Underdog_Observed_Summary$MeanObservedGD >= 0, -0.5, 1.2),
    size = 3.5
  ) +
  scale_fill_brewer(palette = "Blues") +
  labs(
    title = "Observed Goal Differential by Underdog/Favorite Status and Touch Level",
    x = "Underdog/Favorite Status (Spread Group)",
    y = "Mean Goal Differential",
    fill = "Touch Level"
  ) +
  theme_minimal() +
  theme(legend.position = "bottom")

# Statistically compare goal differentials across spreadgroup and touchgroup
# Via a two-way ANOVa 
# Make sure grouping variables are factors
Underdog_Observed_ANOVA <- Underdog_Analysis %>%
  filter(!is.na(GoalDiff) & !is.na(ScaledTouch) & !is.na(Spread)) %>%
  mutate(
    SpreadGroup = case_when(
      Spread <= -spread_cutoff ~ "Major Underdog",
      Spread > -spread_cutoff & Spread < 0 ~ "Mild Underdog",
      Spread == 0 ~ "Even",
      Spread > 0 & Spread < spread_cutoff ~ "Mild Favorite",
      Spread >= spread_cutoff ~ "Major Favorite"
    ),
    TouchGroup = case_when(
      ScaledTouch >= 1 ~ "High Touch",
      ScaledTouch <= -1 ~ "Low Touch",
      TRUE ~ "Average Touch"
    ),
    SpreadGroup = factor(SpreadGroup, levels = spread_levels),
    TouchGroup = factor(TouchGroup, levels = touch_levels)
  )

# Run Two-Way ANOVA
anova_result <- aov(GoalDiff ~ SpreadGroup * TouchGroup, data = Underdog_Observed_ANOVA)
summary(anova_result)
##                         Df Sum Sq Mean Sq F value   Pr(>F)    
## SpreadGroup              3  126.9   42.31  18.414 6.32e-11 ***
## TouchGroup               2   46.5   23.25  10.116 5.74e-05 ***
## SpreadGroup:TouchGroup   6    7.8    1.30   0.566    0.757    
## Residuals              278  638.8    2.30                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(anova_result)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = GoalDiff ~ SpreadGroup * TouchGroup, data = Underdog_Observed_ANOVA)
## 
## $SpreadGroup
##                                    diff        lwr      upr     p adj
## Mild Underdog-Major Underdog  0.8757993 0.13593563 1.615663 0.0129308
## Mild Favorite-Major Underdog  1.4926217 0.75275806 2.232485 0.0000022
## Major Favorite-Major Underdog 2.3684211 1.46959719 3.267245 0.0000000
## Mild Favorite-Mild Underdog   0.6168224 0.08118058 1.152464 0.0166569
## Major Favorite-Mild Underdog  1.4926217 0.75275806 2.232485 0.0000022
## Major Favorite-Mild Favorite  0.8757993 0.13593563 1.615663 0.0129308
## 
## $TouchGroup
##                               diff        lwr      upr     p adj
## Average Touch-Low Touch  0.8187351 0.17132017 1.466150 0.0087855
## High Touch-Low Touch     1.4354064 0.67500680 2.195806 0.0000372
## High Touch-Average Touch 0.6166713 0.07947207 1.153871 0.0197897
## 
## $`SpreadGroup:TouchGroup`
##                                                                 diff
## Mild Underdog:Low Touch-Major Underdog:Low Touch           0.5220588
## Mild Favorite:Low Touch-Major Underdog:Low Touch           1.1607143
## Major Favorite:Low Touch-Major Underdog:Low Touch          3.1250000
## Major Underdog:Average Touch-Major Underdog:Low Touch      0.7950000
## Mild Underdog:Average Touch-Major Underdog:Low Touch       1.5099206
## Mild Favorite:Average Touch-Major Underdog:Low Touch       2.1408228
## Major Favorite:Average Touch-Major Underdog:Low Touch      3.0750000
## Major Underdog:High Touch-Major Underdog:Low Touch         1.2750000
## Mild Underdog:High Touch-Major Underdog:Low Touch          2.3564815
## Mild Favorite:High Touch-Major Underdog:Low Touch          2.6845238
## Major Favorite:High Touch-Major Underdog:Low Touch         2.8750000
## Mild Favorite:Low Touch-Mild Underdog:Low Touch            0.6386555
## Major Favorite:Low Touch-Mild Underdog:Low Touch           2.6029412
## Major Underdog:Average Touch-Mild Underdog:Low Touch       0.2729412
## Mild Underdog:Average Touch-Mild Underdog:Low Touch        0.9878618
## Mild Favorite:Average Touch-Mild Underdog:Low Touch        1.6187640
## Major Favorite:Average Touch-Mild Underdog:Low Touch       2.5529412
## Major Underdog:High Touch-Mild Underdog:Low Touch          0.7529412
## Mild Underdog:High Touch-Mild Underdog:Low Touch           1.8344227
## Mild Favorite:High Touch-Mild Underdog:Low Touch           2.1624650
## Major Favorite:High Touch-Mild Underdog:Low Touch          2.3529412
## Major Favorite:Low Touch-Mild Favorite:Low Touch           1.9642857
## Major Underdog:Average Touch-Mild Favorite:Low Touch      -0.3657143
## Mild Underdog:Average Touch-Mild Favorite:Low Touch        0.3492063
## Mild Favorite:Average Touch-Mild Favorite:Low Touch        0.9801085
## Major Favorite:Average Touch-Mild Favorite:Low Touch       1.9142857
## Major Underdog:High Touch-Mild Favorite:Low Touch          0.1142857
## Mild Underdog:High Touch-Mild Favorite:Low Touch           1.1957672
## Mild Favorite:High Touch-Mild Favorite:Low Touch           1.5238095
## Major Favorite:High Touch-Mild Favorite:Low Touch          1.7142857
## Major Underdog:Average Touch-Major Favorite:Low Touch     -2.3300000
## Mild Underdog:Average Touch-Major Favorite:Low Touch      -1.6150794
## Mild Favorite:Average Touch-Major Favorite:Low Touch      -0.9841772
## Major Favorite:Average Touch-Major Favorite:Low Touch     -0.0500000
## Major Underdog:High Touch-Major Favorite:Low Touch        -1.8500000
## Mild Underdog:High Touch-Major Favorite:Low Touch         -0.7685185
## Mild Favorite:High Touch-Major Favorite:Low Touch         -0.4404762
## Major Favorite:High Touch-Major Favorite:Low Touch        -0.2500000
## Mild Underdog:Average Touch-Major Underdog:Average Touch   0.7149206
## Mild Favorite:Average Touch-Major Underdog:Average Touch   1.3458228
## Major Favorite:Average Touch-Major Underdog:Average Touch  2.2800000
## Major Underdog:High Touch-Major Underdog:Average Touch     0.4800000
## Mild Underdog:High Touch-Major Underdog:Average Touch      1.5614815
## Mild Favorite:High Touch-Major Underdog:Average Touch      1.8895238
## Major Favorite:High Touch-Major Underdog:Average Touch     2.0800000
## Mild Favorite:Average Touch-Mild Underdog:Average Touch    0.6309021
## Major Favorite:Average Touch-Mild Underdog:Average Touch   1.5650794
## Major Underdog:High Touch-Mild Underdog:Average Touch     -0.2349206
## Mild Underdog:High Touch-Mild Underdog:Average Touch       0.8465608
## Mild Favorite:High Touch-Mild Underdog:Average Touch       1.1746032
## Major Favorite:High Touch-Mild Underdog:Average Touch      1.3650794
## Major Favorite:Average Touch-Mild Favorite:Average Touch   0.9341772
## Major Underdog:High Touch-Mild Favorite:Average Touch     -0.8658228
## Mild Underdog:High Touch-Mild Favorite:Average Touch       0.2156587
## Mild Favorite:High Touch-Mild Favorite:Average Touch       0.5437010
## Major Favorite:High Touch-Mild Favorite:Average Touch      0.7341772
## Major Underdog:High Touch-Major Favorite:Average Touch    -1.8000000
## Mild Underdog:High Touch-Major Favorite:Average Touch     -0.7185185
## Mild Favorite:High Touch-Major Favorite:Average Touch     -0.3904762
## Major Favorite:High Touch-Major Favorite:Average Touch    -0.2000000
## Mild Underdog:High Touch-Major Underdog:High Touch         1.0814815
## Mild Favorite:High Touch-Major Underdog:High Touch         1.4095238
## Major Favorite:High Touch-Major Underdog:High Touch        1.6000000
## Mild Favorite:High Touch-Mild Underdog:High Touch          0.3280423
## Major Favorite:High Touch-Mild Underdog:High Touch         0.5185185
## Major Favorite:High Touch-Mild Favorite:High Touch         0.1904762
##                                                                   lwr       upr
## Mild Underdog:Low Touch-Major Underdog:Low Touch          -1.62016744 2.6642851
## Mild Favorite:Low Touch-Major Underdog:Low Touch          -1.42521358 3.7466421
## Major Favorite:Low Touch-Major Underdog:Low Touch          0.06528889 6.1847111
## Major Underdog:Average Touch-Major Underdog:Low Touch     -1.23458274 2.8245827
## Mild Underdog:Average Touch-Major Underdog:Low Touch      -0.36541366 3.3852549
## Mild Favorite:Average Touch-Major Underdog:Low Touch       0.28700998 3.9946356
## Major Favorite:Average Touch-Major Underdog:Low Touch      1.08684167 5.0631583
## Major Underdog:High Touch-Major Underdog:Low Touch        -1.57343603 4.1234360
## Mild Underdog:High Touch-Major Underdog:Low Touch          0.34520425 4.3677587
## Mild Favorite:High Touch-Major Underdog:Low Touch          0.60861081 4.7604368
## Major Favorite:High Touch-Major Underdog:Low Touch        -0.18471111 5.9347111
## Mild Favorite:Low Touch-Mild Underdog:Low Touch           -1.60521060 2.8825215
## Major Favorite:Low Touch-Mild Underdog:Low Touch          -0.17370141 5.3795838
## Major Underdog:Average Touch-Mild Underdog:Low Touch      -1.29776507 1.8436474
## Mild Underdog:Average Touch-Mild Underdog:Low Touch       -0.37771159 2.3534352
## Mild Favorite:Average Touch-Mild Underdog:Low Touch        0.28289949 2.9546284
## Major Favorite:Average Touch-Mild Underdog:Low Touch       1.03614009 4.0697423
## Major Underdog:High Touch-Mild Underdog:Low Touch         -1.78900685 3.2948892
## Mild Underdog:High Touch-Mild Underdog:Low Touch           0.28744235 3.3814030
## Mild Favorite:High Touch-Mild Underdog:Low Touch           0.53233409 3.7925959
## Major Favorite:High Touch-Mild Underdog:Low Touch         -0.42370141 5.1295838
## Major Favorite:Low Touch-Mild Favorite:Low Touch          -1.16742845 5.0959999
## Major Underdog:Average Touch-Mild Favorite:Low Touch      -2.50230214 1.7708736
## Mild Underdog:Average Touch-Mild Favorite:Low Touch       -1.64144185 2.3398545
## Mild Favorite:Average Touch-Mild Favorite:Low Touch       -0.99027812 2.9504951
## Major Favorite:Average Touch-Mild Favorite:Low Touch      -0.18299230 4.0115637
## Major Underdog:High Touch-Mild Favorite:Low Touch         -2.81135769 3.0399291
## Mild Underdog:High Touch-Mild Favorite:Low Touch          -0.92343964 3.3149740
## Mild Favorite:High Touch-Mild Favorite:Low Touch          -0.65683632 3.7044554
## Major Favorite:High Touch-Mild Favorite:Low Touch         -1.41742845 4.8459999
## Major Underdog:Average Touch-Major Favorite:Low Touch     -5.02069077 0.3606908
## Mild Underdog:Average Touch-Major Favorite:Low Touch      -4.19141192 0.9612532
## Mild Favorite:Average Touch-Major Favorite:Low Touch      -3.54488661 1.5765322
## Major Favorite:Average Touch-Major Favorite:Low Touch     -2.70958347 2.6095835
## Major Underdog:High Touch-Major Favorite:Low Touch        -5.20174559 1.5017456
## Mild Underdog:High Touch-Major Favorite:Low Touch         -3.44542845 1.9083914
## Mild Favorite:High Touch-Major Favorite:Low Touch         -3.16628349 2.2853311
## Major Favorite:High Touch-Major Favorite:Low Touch        -3.78305007 3.2830501
## Mild Underdog:Average Touch-Major Underdog:Average Touch  -0.46612274 1.8959640
## Mild Favorite:Average Touch-Major Underdog:Average Touch   0.19925982 2.4923857
## Major Favorite:Average Touch-Major Underdog:Average Touch  0.92694484 3.6330552
## Major Underdog:High Touch-Major Underdog:Average Touch    -1.96776889 2.9277689
## Mild Underdog:High Touch-Major Underdog:Average Touch      0.17467911 2.9482839
## Mild Favorite:High Touch-Major Underdog:Average Touch      0.41053784 3.3685098
## Major Favorite:High Touch-Major Underdog:Average Touch    -0.61069077 4.7706908
## Mild Favorite:Average Touch-Mild Underdog:Average Touch   -0.21306422 1.4748685
## Major Favorite:Average Touch-Mild Underdog:Average Touch   0.45673336 2.6734254
## Major Underdog:High Touch-Mild Underdog:Average Touch     -2.55639541 2.0865541
## Mild Underdog:High Touch-Mild Underdog:Average Touch      -0.30274043 1.9958621
## Mild Favorite:High Touch-Mild Underdog:Average Touch      -0.08439329 2.4335996
## Major Favorite:High Touch-Mild Underdog:Average Touch     -1.21125319 3.9414119
## Major Favorite:Average Touch-Mild Favorite:Average Touch  -0.13735163 2.0057061
## Major Underdog:High Touch-Mild Favorite:Average Touch     -3.16994698 1.4383014
## Mild Underdog:High Touch-Mild Favorite:Average Touch      -0.89817998 1.3294974
## Mild Favorite:High Touch-Mild Favorite:Average Touch      -0.68300810 1.7704102
## Major Favorite:High Touch-Mild Favorite:Average Touch     -1.82653218 3.2948866
## Major Underdog:High Touch-Major Favorite:Average Touch    -4.21353267 0.6135327
## Mild Underdog:High Touch-Major Favorite:Average Touch     -2.04395740 0.6069204
## Mild Favorite:High Touch-Major Favorite:Average Touch     -1.81208335 1.0311310
## Major Favorite:High Touch-Major Favorite:Average Touch    -2.85958347 2.4595835
## Mild Underdog:High Touch-Major Underdog:High Touch        -1.35113080 3.5140938
## Mild Favorite:High Touch-Major Underdog:High Touch        -1.07679499 3.8958426
## Major Favorite:High Touch-Major Underdog:High Touch       -1.75174559 4.9517456
## Mild Favorite:High Touch-Mild Underdog:High Touch         -1.12572157 1.7818062
## Major Favorite:High Touch-Mild Underdog:High Touch        -2.15839141 3.1954284
## Major Favorite:High Touch-Mild Favorite:High Touch        -2.53533111 2.9162835
##                                                               p adj
## Mild Underdog:Low Touch-Major Underdog:Low Touch          0.9996889
## Mild Favorite:Low Touch-Major Underdog:Low Touch          0.9450502
## Major Favorite:Low Touch-Major Underdog:Low Touch         0.0404131
## Major Underdog:Average Touch-Major Underdog:Low Touch     0.9797030
## Mild Underdog:Average Touch-Major Underdog:Low Touch      0.2554066
## Mild Favorite:Average Touch-Major Underdog:Low Touch      0.0093229
## Major Favorite:Average Touch-Major Underdog:Low Touch     0.0000405
## Major Underdog:High Touch-Major Underdog:Low Touch        0.9460894
## Mild Underdog:High Touch-Major Underdog:Low Touch         0.0076307
## Mild Favorite:High Touch-Major Underdog:Low Touch         0.0016316
## Major Favorite:High Touch-Major Underdog:Low Touch        0.0881838
## Mild Favorite:Low Touch-Mild Underdog:Low Touch           0.9986595
## Major Favorite:Low Touch-Mild Underdog:Low Touch          0.0899273
## Major Underdog:Average Touch-Mild Underdog:Low Touch      0.9999894
## Mild Underdog:Average Touch-Mild Underdog:Low Touch       0.4201335
## Mild Favorite:Average Touch-Mild Underdog:Low Touch       0.0046684
## Major Favorite:Average Touch-Mild Underdog:Low Touch      0.0000044
## Major Underdog:High Touch-Mild Underdog:Low Touch         0.9980730
## Mild Underdog:High Touch-Mild Underdog:Low Touch          0.0064287
## Mild Favorite:High Touch-Mild Underdog:Low Touch          0.0010389
## Major Favorite:High Touch-Mild Underdog:Low Touch         0.1887080
## Major Favorite:Low Touch-Mild Favorite:Low Touch          0.6463470
## Major Underdog:Average Touch-Mild Favorite:Low Touch      0.9999909
## Mild Underdog:Average Touch-Mild Favorite:Low Touch       0.9999883
## Mild Favorite:Average Touch-Mild Favorite:Low Touch       0.8928466
## Major Favorite:Average Touch-Mild Favorite:Low Touch      0.1115921
## Major Underdog:High Touch-Mild Favorite:Low Touch         1.0000000
## Mild Underdog:High Touch-Mild Favorite:Low Touch          0.7827499
## Mild Favorite:High Touch-Mild Favorite:Low Touch          0.4767355
## Major Favorite:High Touch-Mild Favorite:Low Touch         0.8144886
## Major Underdog:Average Touch-Major Favorite:Low Touch     0.1637013
## Mild Underdog:Average Touch-Major Favorite:Low Touch      0.6471214
## Mild Favorite:Average Touch-Major Favorite:Low Touch      0.9824915
## Major Favorite:Average Touch-Major Favorite:Low Touch     1.0000000
## Major Underdog:High Touch-Major Favorite:Low Touch        0.8061549
## Mild Underdog:High Touch-Major Favorite:Low Touch         0.9985492
## Mild Favorite:High Touch-Major Favorite:Low Touch         0.9999950
## Major Favorite:High Touch-Major Favorite:Low Touch        1.0000000
## Mild Underdog:Average Touch-Major Underdog:Average Touch  0.6963312
## Mild Favorite:Average Touch-Major Underdog:Average Touch  0.0074356
## Major Favorite:Average Touch-Major Underdog:Average Touch 0.0000042
## Major Underdog:High Touch-Major Underdog:Average Touch    0.9999637
## Mild Underdog:High Touch-Major Underdog:Average Touch     0.0130461
## Mild Favorite:High Touch-Major Underdog:Average Touch     0.0020066
## Major Favorite:High Touch-Major Underdog:Average Touch    0.3149634
## Mild Favorite:Average Touch-Mild Underdog:Average Touch   0.3672151
## Major Favorite:Average Touch-Mild Underdog:Average Touch  0.0003107
## Major Underdog:High Touch-Mild Underdog:Average Touch     1.0000000
## Mild Underdog:High Touch-Mild Underdog:Average Touch      0.3908623
## Mild Favorite:High Touch-Mild Underdog:Average Touch      0.0935777
## Major Favorite:High Touch-Mild Underdog:Average Touch     0.8447842
## Major Favorite:Average Touch-Mild Favorite:Average Touch  0.1563182
## Major Underdog:High Touch-Mild Favorite:Average Touch     0.9853468
## Mild Underdog:High Touch-Mild Favorite:Average Touch      0.9999681
## Mild Favorite:High Touch-Mild Favorite:Average Touch      0.9496491
## Major Favorite:High Touch-Mild Favorite:Average Touch     0.9985668
## Major Underdog:High Touch-Major Favorite:Average Touch    0.3709461
## Mild Underdog:High Touch-Major Favorite:Average Touch     0.8239407
## Mild Favorite:High Touch-Major Favorite:Average Touch     0.9990347
## Major Favorite:High Touch-Major Favorite:Average Touch    1.0000000
## Mild Underdog:High Touch-Major Underdog:High Touch        0.9485684
## Mild Favorite:High Touch-Major Underdog:High Touch        0.7775229
## Major Favorite:High Touch-Major Underdog:High Touch       0.9172329
## Mild Favorite:High Touch-Mild Underdog:High Touch         0.9998529
## Major Favorite:High Touch-Mild Underdog:High Touch        0.9999679
## Major Favorite:High Touch-Mild Favorite:High Touch        1.0000000
#Summary Table of Means and SDs per Group
Underdog_Observed_ANOVA %>%
  group_by(SpreadGroup, TouchGroup) %>%
  summarise(
    Mean_GD = mean(GoalDiff, na.rm = TRUE),
    SD_GD = sd(GoalDiff, na.rm = TRUE),
    n = n(),
    .groups = "drop"
  ) %>%
  arrange(SpreadGroup, TouchGroup)
## # A tibble: 12 × 5
##    SpreadGroup    TouchGroup    Mean_GD SD_GD     n
##    <fct>          <fct>           <dbl> <dbl> <int>
##  1 Major Underdog Low Touch      -1.88   1.89     8
##  2 Major Underdog Average Touch  -1.08   1.41    25
##  3 Major Underdog High Touch     -0.6    1.67     5
##  4 Mild Underdog  Low Touch      -1.35   1.06    17
##  5 Mild Underdog  Average Touch  -0.365  1.52    63
##  6 Mild Underdog  High Touch      0.481  1.53    27
##  7 Mild Favorite  Low Touch      -0.714  1.70     7
##  8 Mild Favorite  Average Touch   0.266  1.51    79
##  9 Mild Favorite  High Touch      0.810  1.57    21
## 10 Major Favorite Low Touch       1.25   1.71     4
## 11 Major Favorite Average Touch   1.2    1.61    30
## 12 Major Favorite High Touch      1      1.41     4
#ANOVA categorically dos not say that spread AND touch together affects goal differential
#It does say that underdogs are more likely to lose (duh) and that more touch is better (duh)
#Look at GAM Model to see other stuff, there it creates a a relationship between the three values

############################ GAM Model | Underdog Hypothesis ############################

#This is asking: If a team is more/less touchy than usual, and they are underdog/overdog, how does this impact the goal differenetial?

# Fit GAM model to allow for nonlinear effects

# Changing k = 15 creates completely flattens slope
gam_model <- gam(
  GoalDiff ~ s(Spread, ScaledTouch, k = 100, bs = "tp"),  # increase k for smoother fit
  data = Underdog_Analysis
)

# Create grid for predictions
spread_seq <- seq(min(Underdog_Analysis$Spread, na.rm = TRUE),
                  max(Underdog_Analysis$Spread, na.rm = TRUE), length.out = 50)
touch_seq <- seq(min(Underdog_Analysis$ScaledTouch, na.rm = TRUE),
                 max(Underdog_Analysis$ScaledTouch, na.rm = TRUE), length.out = 50)

grid <- expand.grid(Spread = spread_seq, ScaledTouch = touch_seq)
grid$GoalDiff <- predict(gam_model, newdata = grid)

# Convert to matrix for surface
z_matrix <- matrix(grid$GoalDiff, nrow = length(spread_seq), ncol = length(touch_seq))

# 3D Plot: See below in next R chunk
#Note: Each dot represents one match outcome for a team, therefore 2 dots for each match
# plot_ly() %>%
#   add_surface(
#     x = ~spread_seq,
#     y = ~touch_seq,
#     z = ~z_matrix,
#     colorscale = list(
#       c(0, "red"),  # red for losses
#       c(1, "green")   # green for wins
#     ),
#     cmin = min(Underdog_Analysis$GoalDiff, na.rm = TRUE),
#     cmax = max(Underdog_Analysis$GoalDiff, na.rm = TRUE),
#     opacity = 0.7,
#     showscale = TRUE
#   ) %>%
#   add_markers(
#     data = Underdog_Analysis,
#     x = ~Spread,
#     y = ~ScaledTouch,
#     z = ~GoalDiff,
#     marker = list(
#       size = 3,
#       color = ~GoalDiff,
#       colorscale = list(c(0, "#ff0000"), c(1, "#00ff00")),  # flipped: red = low, green = high
#       cmin = min(Underdog_Analysis$GoalDiff, na.rm = TRUE),
#       cmax = max(Underdog_Analysis$GoalDiff, na.rm = TRUE)
#     ),
#     name = "Observed"
#   ) %>%
#   layout(
#     title = "Underdog Hypothesis: Spread x Touch Deviation x Goal Differential",
#     scene = list(
#       xaxis = list(title = "Spread (Opponent Rank - Team Rank)"),
#       yaxis = list(title = "Scaled Touch Deviation"),
#       zaxis = list(title = "Goal Differential")
#     )
#   )


############################ GAM Model CATEGORICAL Table Summary (Interpretation of GAM) | Underdog Hypothesis ############################

# Evaluates how a team's goal differential is predicated by a model across a spectrum of two predictors:
# Predictor One = Ranking Spread
# Predictor Two = Scaled Touch Deviation (how much more or less physical touch a team used compared to their norm)

# Backpedals to the observed table CATEGORICAL idea. 

# Define a grid of Spread and ScaledTouch values
# Create a sequence of 100 evenly spaced values from smallest to largest spread
spread_vals <- seq(min(Underdog_Analysis$Spread, na.rm = TRUE),
                   max(Underdog_Analysis$Spread, na.rm = TRUE),
                   length.out = 100)

# Create a sequence of 100 evenly spaced values from smallest to largest touch
touch_vals <- seq(min(Underdog_Analysis$ScaledTouch, na.rm = TRUE),
                  max(Underdog_Analysis$ScaledTouch, na.rm = TRUE),
                  length.out = 100)

# Create a ten thousand row data frame by combining all hundred by hundred values above
grid <- expand.grid(Spread = spread_vals, ScaledTouch = touch_vals)

# Predict GoalDiff across the grid on each fake game (ten thousand of them)
grid$PredictedGoalDiff <- predict(gam_model, newdata = grid)

#Note choice of 7 as the cutoff is somewhat arbitrary
grid_summary <- grid %>%
  mutate(
    SpreadGroup = case_when(
      Spread <= -spread_cutoff ~ "Major Underdog",
      Spread > -spread_cutoff & Spread < 0 ~ "Mild Underdog",
      Spread == 0 ~ "Even",
      Spread > 0 & Spread < spread_cutoff ~ "Mild Favorite",
      Spread >= spread_cutoff ~ "Major Favorite"
    ),
    TouchGroup = case_when(
      ScaledTouch >= 1 ~ "High Touch",
      ScaledTouch <= -1 ~ "Low Touch",
      TRUE ~ "Average Touch"
    )
  ) %>%
  group_by(SpreadGroup, TouchGroup) %>%
  summarise(
    MeanPredGD = mean(PredictedGoalDiff, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  arrange(SpreadGroup, TouchGroup)

#Bar graph to see the GAM model major/mild underdogs versus tables
# Set same factor levels for consistency
grid_summary <- grid_summary %>%
  mutate(
    SpreadGroup = factor(SpreadGroup, levels = spread_levels),
    TouchGroup = factor(TouchGroup, levels = touch_levels)
  )

# Plot GAM model data
GAM_Model_Data_Plot <- ggplot(grid_summary, aes(x = SpreadGroup, y = MeanPredGD, fill = TouchGroup)) +
  geom_col(position = position_dodge(width = 0.8), width = 0.7) +
  scale_fill_brewer(palette = "Blues") +
  labs(
    title = "GAM Model Predicted Goal Differential by Underdog/Favorite Status and Touch Level",
    x = "Underdog/Favorite Status (Spread Group)",
    y = "Predicted Goal Differential",
    fill = "Touch Level"
  ) +
  theme_minimal() +
  theme(legend.position = "bottom")

#Output for Hypothesis 3: